Vorhersage Kandidaten Bundestagswahl 2021

Cédric Scherer https://cedricscherer.com , Ansgar Wolsing
2021-09-25

Setup

pacman::p_load("tidyverse", "here", "glue", "colorspace", "gsheet", "labelled")

Daten

Tile Map

## csv mit nur Wahlkreisen, für Tile Map
#write_csv(dat_erst_winneronly_dw %>% dplyr::select(Wahlkreisname), here::here("data", "wk.csv"))

## Grid Preparation für Labels und Bundesländer
# grid <- 
#   read_csv(here::here("data", "wk_grid_b.csv")) %>% 
#   mutate(
#     id = str_sub(Wahlkreisname, 1, 3),
#     id_num = as.numeric(id),
#     wk = str_sub(Wahlkreisname, 5, nchar(Wahlkreisname)),
#     land = case_when(
#       id_num %in% 1:11 ~ "Schleswig-Holstein",
#       id_num %in% 12:17 ~ "Mecklenburg-Vorpommern",
#       id_num %in% 18:23 ~ "Hamburg",
#       id_num %in% 24:53 ~ "Niedersachsen",
#       id_num %in% 54:55 ~ "Bremen",
#       id_num %in% 56:65 ~ "Brandenburg",
#       id_num %in% 66:74 ~ "Sachsen-Anhalt",
#       id_num %in% 75:86 ~ "Berlin",
#       id_num %in% 87:150 ~ "Nordrhein-Westfalen",
#       id_num %in% 151:166 ~ "Sachsen",
#       id_num %in% 167:188 ~ "Hessen",
#       id_num %in% 189:196 ~ "Thüringen",
#       id_num %in% 197:211 ~ "Rheinland-Pfalz",
#       id_num %in% 212:257 ~ "Bayern",
#       id_num %in% 258:295 ~ "Baden-Württemberg",
#       id_num %in% 296:299 ~ "Saarland"
#     )
#   )
# 
# write_csv(grid, here::here("data", "de_constituencies_grid_b.csv"))

grid <- read_csv(here::here("data", "de_constituencies_grid_b.csv"))
Rows: 299 Columns: 7
── Column specification ──────────────────────────────────────────────
Delimiter: ","
chr (4): Wahlkreisname, id, wk, land
dbl (3): row, col, id_num

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Vorhersage der Direktmandatsgewinner*innen 2021

Source: Kayser, Leininger, Murr & Stötzer (2021) Citizens’ Forecast for the 2021 German National Election https://aleininger.eu/citizens_forecast2021/

sheet_url <- "https://docs.google.com/spreadsheets/d/1xOg9kNRMfmUXoJAYNp7R93UHfPlVus-2VV-i-Rdx1Uc/edit#gid=0"
dat_erst <- gsheet2tbl(sheet_url)
#dat_erst <- read_csv(here::here("data", "Buerger_innenvorhersage 2021 - Prognose.csv"))


#' t-Test auf Basis von Mittelwert und Standardabweichung 
t_test_from_summary <- function(m1, m2, sd1, sd2, n1, n2, ...) {
  group1 <- scale(1:n1)*sd1 + m1
  group2 <- scale(1:n2)*sd2 + m2
  t.test(group1, group2, ...)
}


dat_erst_ttest <- dat_erst %>% 
  mutate(Stimmenanteil_Mean = ifelse(Stimmenanteil_Mean > 1000, 
                                     Stimmenanteil_Mean / 1000, 
                                     Stimmenanteil_Mean)) %>% 
  group_by(wkr) %>% 
  slice_max(order_by = Stimmenanteil_Mean, n = 2, with_ties = FALSE) %>% 
  mutate(rank = rank(-Stimmenanteil_Mean, ties.method = "first")) %>% 
  ungroup() %>% 
  select(wkr, rank, Stimmenanteil_Mean, Stimmenanteil_SD, obs) %>% 
  pivot_wider(id_cols = wkr, 
              names_from = "rank", 
              values_from = c("Stimmenanteil_Mean", "Stimmenanteil_SD", "obs")) %>% 
  mutate(t_test = pmap(list(m1 = Stimmenanteil_Mean_1, m2 = Stimmenanteil_Mean_2,
                      sd1 = Stimmenanteil_SD_1, sd2 = Stimmenanteil_SD_2,
                      n1 = obs_1, n2 = obs_2, alternative = "greater"), 
                      t_test_from_summary),
         t = map_dbl(t_test, "statistic"),
         p_value = map_dbl(t_test, "p.value"))

dat_erst_winneronly_dw <- 
  dat_erst %>%
  group_by(wkr) %>%
  dplyr::select(wkr, Wahlkreisname, party, kandidate_name, obs, Gewinner_share) %>%
  mutate(Gewinner_share = ifelse(Gewinner_share > 100, Gewinner_share / 1000, Gewinner_share),
         rank = rank(Gewinner_share, ties.method = "first"),
         name = to_character(wkr),
         Gewinner_share = round(Gewinner_share*100))  %>%
  filter(rank > 5) %>%
  mutate(party = as.character(party)) %>%
  unite(val, party, Gewinner_share, kandidate_name) %>%
  spread(rank,val) %>%
  separate("7",into = c("first-place-party", "first-place-votes", "winner"),"_") %>%
  separate("6",into = c("second-place-party", "second-place-votes", "second"),"_") %>%
  mutate(outcome = ifelse(`first-place-votes` == `second-place-votes`, 'Kopf-an-Kopf', `first-place-party`)) %>%
  relocate("outcome", "first-place-party", "first-place-votes", "winner", .before = "second-place-party") %>% 
  ungroup() %>% 
  # add t-test statistic
  inner_join(dat_erst_ttest, by = "wkr")

Kombinierte Datensätze

dat_winneronly_grid <- 
  dat_erst_winneronly_dw %>% 
  left_join(grid) %>% 
  mutate(
    outcome_agg = ifelse(outcome %in% c("CDU", "CSU"), "CDU/CSU", outcome),
    diff = as.numeric(`first-place-votes`) - as.numeric(`second-place-votes`)
  )
Joining, by = "Wahlkreisname"

Visualisierungen

Setup

theme_set(theme_void(base_size = 16, base_family = "Noto Serif"))
theme_update(legend.margin = margin(0, 0, 0, 25),
             legend.text = element_text(margin = margin(5, 0, 5, 0)),
             plot.title = element_text(hjust = .5, face = "bold", 
                                       lineheight = 1.1, margin = margin(t = 10, b = 20)),
             plot.subtitle = element_text(hjust = .5, color = "grey40", size = 15,
                                          margin = margin(t = -8, b = 18)),
             plot.title.position = "plot",
             plot.caption = element_text(hjust = 0, color = "grey40", 
                                         lineheight = 1.3,
                                         size = 10, margin = margin(20, 0, 5, 0)),
             plot.caption.position = "plot",
             plot.margin = margin(10, 0, 10, 0))

# Party colors
party_colors <- c("CDU/CSU" = "grey9",
                  #"CSU" = "grey18",
                  "SPD" = "#ca0002", ## "#E3000F",  a bit darker now to make it work with CVD
                  "AfD" = rgb(0, 158, 224, maxColorValue = 255),
                  "FDP" = darken("#ffed00", 0.1),
                  "Linke" = "purple",
                  "Grüne" = rgb(100, 161, 45, maxColorValue = 255))

caption <- "Grafik: Cédric Scherer & Ansgar Wolsing\nDaten: Kayser, Leininger, Murr & Stötzer (2021) Citizens’ Forecast for the 2021 German National Election (aleininger.eu/citizens_forecast2021)"
title <- "Bürger*innenvorhersage der Direktmandatsgewinner*innen\nin den Wahlkreisen zur Bundestagswahl 2021"

Bubble Map

ggplot(dat_winneronly_grid, aes(col, row)) +
  geom_point(aes(color = outcome_agg), size = 10) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")),
                     name = NULL) +
  guides(color = guide_legend(override.aes = list(size = 6))) +
  theme(legend.position = c(.87, .25)) +
  labs(title = title, caption = caption)
ggsave(here::here("plots", "bubble_map.pdf"), width = 10, height = 13, device = cairo_pdf)

Variante mit gedämpfteren Farben für die Füllung der Punkte

ggplot(dat_winneronly_grid, aes(col, row)) +
  geom_point(
    aes(fill = outcome_agg), 
    size = 10, shape = 21, stroke = 2, alpha = .5, color = "transparent"
  ) +
  geom_point(
    aes(color = outcome_agg), 
    size = 10, shape = 21, stroke = 2, fill = "transparent"
  ) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  scale_fill_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  guides(color = guide_legend(override.aes = list(size = 6))) +
  theme(legend.position = c(.87, .25)) +
  labs(title = title, caption = caption)
ggsave(here::here("plots", "bubble_map_var.pdf"), width = 10, height = 13, device = cairo_pdf)

Bubble Map inkl. Vorsprung als Farbintensität

Vorsprung in Prozentpunkten

ggplot(dat_winneronly_grid, aes(col, row)) +
  geom_point(
    aes(fill = outcome_agg, alpha = diff), 
    size = 9, shape = 21, stroke = 2, color = "transparent"
  ) +
  geom_point(
    aes(color = outcome_agg), 
    size = 9, shape = 21, stroke = 2, fill = "transparent"
  ) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  scale_fill_manual(values = c(party_colors, c("Kopf-an-Kopf" = "transparent")), name = NULL) +
  scale_alpha(range = c(0, .86), guide = "none") +
  guides(fill = guide_legend(override.aes = list(size = 6, alpha = .9))) +
  theme(legend.position = c(.87, .25)) +
  labs(title = title, caption = caption,
       subtitle = "Je intensiver die Färbung, desto größer ist der vorhergesagte Vorsprung.")
ggsave(here::here("plots", "bubble_map_diff.pdf"), width = 10, height = 13, device = cairo_pdf)

Vorsprung als t-Statistik

ggplot(dat_winneronly_grid, aes(col, row)) +
  geom_point(
    aes(fill = outcome_agg, 
        # alpha = diff
        alpha = t # Ergebnis t-Test (oder p-value stattdessen?)
        ), 
    size = 9, shape = 21, stroke = 2, color = "transparent"
  ) +
  geom_point(
    aes(color = outcome_agg), 
    size = 9, shape = 21, stroke = 2, fill = "transparent"
  ) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  scale_fill_manual(values = c(party_colors, c("Kopf-an-Kopf" = "transparent")), name = NULL) +
  scale_alpha(range = c(0, .86), guide = "none") +
  guides(fill = guide_legend(override.aes = list(size = 6, alpha = .9))) +
  theme(legend.position = c(.87, .25)) +
  labs(title = title, caption = caption,
       subtitle = "Je intensiver die Färbung, desto größer ist der vorhergesagte Vorsprung.")
ggsave(here::here("plots", "bubble_map_diff.pdf"), width = 10, height = 13, device = cairo_pdf)

Bubble Map inkl. Grenzen der Bundesländer (trial)

ggplot(dat_winneronly_grid, aes(col, row)) +
  ggforce::geom_mark_hull(
    aes(group = land), 
    color = "white",
    expand = unit(0, "mm")
  ) +
  geom_point(aes(color = outcome_agg), size = 7) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")),
                     name = NULL) +
  guides(color = guide_legend(override.aes = list(size = 6))) +
  theme(legend.position = c(.87, .25),
        plot.background = element_rect(color = "grey67", fill = "grey67")) +
  labs(title = title, caption = caption)
ggsave(here::here("plots", "bubble_map_states.pdf"), width = 10, height = 13, device = cairo_pdf)

Bubble Map mit verschiedenen Punktgrößen für Bundesländer (trial)

Weder effektiv noch schön.

# Größenkategorien für Bundesländer
bland_group_mapping <- c(
  "Schleswig-Holstein" = "A",
  "Mecklenburg-Vorpommern" = "B",
  "Hamburg" = "D",
  "Niedersachsen" = "C",
  "Bremen" = "E",
  "Brandenburg" = "A",
  "Sachsen-Anhalt" = "D",
  "Berlin" = "A",
  "Nordrhein-Westfalen" = "D",
  "Sachsen" = "A",
  "Hessen" = "E",
  "Thüringen" = "B",
  "Rheinland-Pfalz" = "A",
  "Bayern" = "D",
  "Baden-Württemberg" = "B",
  "Saarland" = "A"
)

dat_winneronly_grid %>% 
  mutate(mark_group = bland_group_mapping[land],
         mark_shape = case_when(
           mark_group == "A" ~ 21,
           mark_group == "B" ~ 22,
           mark_group == "C" ~ 23,
           mark_group == "D" ~ 24,
           mark_group == "E" ~ 25,
         )) %>% 
  ggplot(aes(col, row)) +
  geom_point(
    aes(fill = outcome_agg, alpha = diff, size = mark_group), 
    # size = 9, 
    shape = 21, 
    stroke = 2, color = "transparent"
  ) +
  geom_point(
    aes(color = outcome_agg, size = mark_group), 
    # size = 9, 
    shape = 21, 
    stroke = 2, fill = "transparent"
  ) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  scale_fill_manual(values = c(party_colors, c("Kopf-an-Kopf" = "transparent")), name = NULL) +
  scale_alpha(range = c(0, .86), guide = "none") +
  scale_size_discrete(range = c(7, 11)) +
  # scale_shape_identity() +
  guides(fill = guide_legend(override.aes = list(size = 6, alpha = .9)),
         size = "none") +
  theme(legend.position = c(.87, .25)) +
  labs(title = title, caption = caption,
       subtitle = "Je intensiver die Färbung, desto größer ist der vorhergesagte Vorsprung.")
Warning: Using size for a discrete variable is not advised.
ggsave(here::here("plots", "bubble_map_diff_bland_mark.pdf"), width = 10, height = 13, device = cairo_pdf)

Bubble Map inkl. Vorsprung als Farbintensität - mit Städten zur Orientierung

constituencies_to_highlight <- c(
  18, # Hamburg-Mitte
  75, # Berlin-Mitte
  93, # Köln I
  220 # München-West/Mitte
)

df_constituencies_highlight <- dat_winneronly_grid %>% 
  filter(wkr %in% constituencies_to_highlight) %>% 
  mutate(name_short = str_extract(Wahlkreisname, "[a-zA-ZäöüÄÖÜ]+")) %>% 
  select(name_short, name, col, row)

Städtenamen als Overlay

ggplot(dat_winneronly_grid, aes(col, row)) +
  geom_point(
    aes(fill = outcome_agg, alpha = diff), 
    size = 9, shape = 21, stroke = 2, color = "transparent"
  ) +
  geom_point(
    aes(color = outcome_agg), 
    size = 9, shape = 21, stroke = 2, fill = "transparent"
  ) +
  geom_label(data = df_constituencies_highlight,
             aes(label = name_short),
             col = "grey99",
             fill = "grey10", alpha = 0.4, family = "Roboto",
             label.size = 0, 
             fontface = "bold", size = 6,
             hjust = 0.5) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  scale_fill_manual(values = c(party_colors, c("Kopf-an-Kopf" = "transparent")), name = NULL) +
  scale_alpha(range = c(0, .86), guide = "none") +
  guides(fill = guide_legend(override.aes = list(size = 6, alpha = .9))) +
  theme(legend.position = c(.87, .25)) +
  labs(title = title, caption = caption,
       subtitle = "Je intensiver die Färbung, desto größer ist der vorhergesagte Vorsprung.")
ggsave(here::here("plots", "bubble_map_diff_location_highlights.pdf"), width = 10, height = 13, device = cairo_pdf)

Städtenamen im Shape

ggplot(dat_winneronly_grid, aes(col, row)) +
  geom_point(
    aes(fill = outcome_agg, alpha = diff), 
    size = 9, shape = 21, stroke = 2, color = "transparent"
  ) +
  geom_point(
    aes(color = outcome_agg), 
    size = 9, shape = 21, stroke = 2, fill = "transparent"
  ) +
  geom_text(data = df_constituencies_highlight,
             aes(label = name_short),
             col = "grey2",
             family = "Roboto", size = 3,
             hjust = 0.5) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  scale_fill_manual(values = c(party_colors, c("Kopf-an-Kopf" = "transparent")), name = NULL) +
  scale_alpha(range = c(0, .86), guide = "none") +
  guides(fill = guide_legend(override.aes = list(size = 6, alpha = .9))) +
  theme(legend.position = c(.87, .25)) +
  labs(title = title, caption = caption,
       subtitle = "Je intensiver die Färbung, desto größer ist der vorhergesagte Vorsprung.")
ggsave(here::here("plots", "bubble_map_diff_location_highlights-2.pdf"), width = 10, height = 13, device = cairo_pdf)

Interaktive Version via {ggiraph}

library(ggiraph)

g_interactive <- dat_winneronly_grid %>% 
  mutate(label = str_wrap(glue::glue("{Wahlkreisname} ({land})<br><br>Vorsprung:<br>{outcome} {diff}%"), 50)) %>% 
  ggplot(aes(col, row)) +
  geom_point_interactive(
    aes(fill = outcome_agg, alpha = diff, tooltip = label, data_id = label), 
    size = 9, shape = 21, stroke = 2, color = "transparent"
  ) +
  geom_point(
    aes(color = outcome_agg), 
    size = 9, shape = 21, stroke = 2, fill = "transparent"
  ) +
  coord_fixed() +
  scale_x_continuous(limits = c(-.5, max(grid$col) + 2)) +
  scale_y_reverse() +
  scale_color_manual(values = c(party_colors, c("Kopf-an-Kopf" = "grey85")), name = NULL) +
  scale_fill_manual(values = c(party_colors, c("Kopf-an-Kopf" = "transparent")), name = NULL) +
  scale_alpha(range = c(0, .86), guide = "none") +
  guides(color = guide_legend(override.aes = list(size = 6)))

tooltip_css <- "background-color:#515151;color:white;font-family:Roboto;padding:10px;border-radius:5px;"

girafe(ggobj = g_interactive, 
       width_svg = 12, height_svg = 12, 
       options = list(
         opts_sizing(rescale = FALSE),
         opts_tooltip(offx = 50, css = tooltip_css)
       ))
#ggsave(here::here("plots", "bubble_map_states.pdf"), width = 10, height = 13, device = cairo_pdf)

Session Info
## DO NOT REMOVE!
Sys.time()
[1] "2021-09-25 02:20:17 CEST"
#git2r::repository() ## uncomment if you are using GitHub
sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Big Sur 11.4

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods  
[7] base     

other attached packages:
 [1] ggiraph_0.7.10   labelled_2.8.0   gsheet_0.4.5    
 [4] colorspace_2.0-2 glue_1.4.2       here_1.0.1      
 [7] forcats_0.5.1    stringr_1.4.0    dplyr_1.0.7     
[10] purrr_0.3.4      readr_2.0.1      tidyr_1.1.3     
[13] tibble_3.1.4     ggplot2_3.3.5    tidyverse_1.3.1 

loaded via a namespace (and not attached):
 [1] fs_1.5.0          lubridate_1.7.10  bit64_4.0.5      
 [4] httr_1.4.2        rprojroot_2.0.2   tools_4.1.1      
 [7] backports_1.2.1   bslib_0.2.5.1     utf8_1.2.2       
[10] R6_2.5.1          DBI_1.1.1         withr_2.4.2      
[13] tidyselect_1.1.1  downlit_0.2.1     bit_4.0.4        
[16] curl_4.3.2        compiler_4.1.1    textshaping_0.3.5
[19] cli_3.0.1         rvest_1.0.1       pacman_0.5.1     
[22] xml2_1.3.2        labeling_0.4.2    sass_0.4.0       
[25] scales_1.1.1      systemfonts_1.0.2 digest_0.6.27    
[28] rmarkdown_2.10    pkgconfig_2.0.3   htmltools_0.5.1.1
[31] dbplyr_2.1.1      highr_0.9         htmlwidgets_1.5.3
[34] rlang_0.4.11      readxl_1.3.1      rstudioapi_0.13  
[37] jquerylib_0.1.4   farver_2.1.0      generics_0.1.0   
[40] jsonlite_1.7.2    vroom_1.5.4       distill_1.2      
[43] magrittr_2.0.1    Rcpp_1.0.7        munsell_0.5.0    
[46] fansi_0.5.0       lifecycle_1.0.0   stringi_1.7.3    
[49] yaml_2.2.1        MASS_7.3-54       grid_4.1.1       
[52] parallel_4.1.1    crayon_1.4.1      haven_2.4.3      
[55] hms_1.1.0         knitr_1.33        pillar_1.6.2     
[58] uuid_0.1-4        reprex_2.0.1      evaluate_0.14    
[61] V8_3.4.2          modelr_0.1.8      vctrs_0.3.8      
[64] tzdb_0.1.2        tweenr_1.0.2      cellranger_1.1.0 
[67] gtable_0.3.0      polyclip_1.10-0   assertthat_0.2.1 
[70] xfun_0.25         ggforce_0.3.3     broom_0.7.9      
[73] ragg_1.1.3        concaveman_1.1.0  ellipsis_0.3.2